business <- read_csv("./data/business.csv") %>% 
  mutate(name = str_sub(name, 2, -2)) %>% 
  mutate(address = str_sub(address, 2, -2)) %>% 
  filter(neighborhood != "Downtown Tampa" & neighborhood != "North Valley")

categories <- business %>% 
  select(business_id, categories) %>% 
  separate(categories, into = c("cat1", "cat2", "cat3", "cat4", "cat5", "cat6", "cat7", "cat8", "cat9", "cat10", "cat11", "cat12", "cat13", "cat14", "cat15", "cat16", "cat17", "cat18", "cat19", "cat20", "cat21", "cat22", "cat23", "cat24", "cat25"), sep  = ";") %>% 
  gather(key = "cat_id", value = "category", starts_with("cat")) %>% 
  select(-cat_id) 
  
restaurant_ids <- categories %>% 
  filter(category == "Restaurants") %>% 
  distinct(business_id)

restaurants <- business %>% 
  mutate(is_rest = ifelse(str_detect(categories, ";Restaurant") == TRUE, 1, 
                                ifelse(str_match(categories, ";Food") == TRUE, 1, 0))) %>% 
  filter(is_rest == 1) %>% 
  select(-is_rest) 
attributes <- read_csv("./data/attributes.csv") %>% 
  janitor::clean_names() %>% 
  select(business_id, alcohol)
restaurants <- restaurants %>% 
  left_join(attributes, by = "business_id")
restaurants %>% 
  group_by(neighborhood) %>% 
  count() %>% 
  arrange(n) %>% 
  knitr::kable()
neighborhood n
Anthem 10
The Lakes 19
University 55
South Summerlin 64
Summerlin 95
Centennial 111
Sunrise 129
Northwest 143
Southwest 152
Chinatown 224
Downtown 277
Spring Valley 347
Eastside 357
Southeast 431
Westside 449
The Strip 613
restaurants %>% 
select(business_id, neighborhood, stars) %>% 
  distinct() %>% 
  group_by(neighborhood, stars) %>% 
  tally %>% 
  rename(my_count = n) %>% 
  spread(key = stars, value = my_count) %>% 
  knitr::kable()
neighborhood 1 1.5 2 2.5 3 3.5 4 4.5 5
Anthem NA NA 1 NA 2 3 2 2 NA
Centennial 1 4 4 14 25 29 21 11 2
Chinatown NA 1 6 9 35 59 65 42 7
Downtown 1 4 9 15 40 56 89 45 18
Eastside 1 9 24 38 58 72 100 48 7
Northwest NA 3 9 17 25 41 30 13 5
South Summerlin NA NA 1 4 11 24 23 1 NA
Southeast 1 9 34 52 62 104 107 45 17
Southwest 1 4 9 16 21 38 42 18 3
Spring Valley NA 4 18 28 44 84 87 72 10
Summerlin 1 2 6 5 22 25 23 9 2
Sunrise 1 9 15 14 20 23 28 18 1
The Lakes NA NA NA NA 3 5 8 3 NA
The Strip 1 10 28 81 125 174 147 38 9
University NA 1 2 3 10 16 13 9 1
Westside 2 9 14 38 60 111 129 65 21
restaurants %>%
  mutate(review_count = as.numeric(review_count)) %>% 
  select(business_id, neighborhood, review_count) %>% 
  group_by(neighborhood) %>% 
  summarise(Average_Number_of_Reviews = mean(review_count)) %>% 
  knitr::kable()
neighborhood Average_Number_of_Reviews
Anthem 190.00000
Centennial 107.78378
Chinatown 190.02232
Downtown 174.64260
Eastside 134.10644
Northwest 80.65734
South Summerlin 211.89062
Southeast 116.44780
Southwest 144.33553
Spring Valley 144.63977
Summerlin 98.70526
Sunrise 36.75194
The Lakes 110.42105
The Strip 408.53507
University 89.81818
Westside 126.61025
restaurants %>% 
  group_by(neighborhood) %>% 
  summarize(avg = mean(stars), sd = sd(stars)) %>% 
  arrange(avg) %>% 
  knitr::kable()
neighborhood avg sd
Sunrise 3.240310 0.9418664
Centennial 3.333333 0.7929615
The Strip 3.364600 0.7062534
Northwest 3.405594 0.7781140
Summerlin 3.415789 0.7775187
Southeast 3.429234 0.8270031
Southwest 3.453947 0.8083899
Eastside 3.457983 0.8156988
South Summerlin 3.523438 0.4994417
Anthem 3.550000 0.7619420
University 3.572727 0.7227659
Westside 3.609131 0.7794122
Spring Valley 3.631124 0.7712010
Downtown 3.707581 0.7692319
Chinatown 3.712054 0.6668898
The Lakes 3.789474 0.4806185

Column

Center of Las Vegas: 36.1699° N, 115.1398° W. Plotly displayed is surrounds this center by 0.5 degrees longitude and latitude.

restaurants %>% 
  filter(latitude > 35.6699 & latitude < 36.6699) %>% 
  filter(longitude < -114.6398 & longitude > -115.6398) %>%
  plot_ly(x = ~longitude, y = ~latitude, type = "scatter", mode = "markers",
          alpha = 0.5, 
          color = ~stars, hoverinfo = 'text',
        text = ~paste(name, " @", neighborhood, "\n", address, "\n", city, ", ", state, postal_code, "\n", stars, "stars on Yelp")) %>%
  layout(xaxis = list(title = "Longitude"),
         yaxis = list(title = "Latitude"))

Column

Plots of Restaurants

popular <- categories %>% 
  filter(category == "Restaurants" | category == "Food") %>% 
  distinct(business_id) %>% 
  left_join(categories, by = "business_id") %>% 
  filter(category %in% c("Bars", "Breakfast & Brunch", "Chinese", "Italian", "Mexican", "Chicken Wings", "Salad", "Sushi Bars", "Pizza", "Steakhouses", "Fast Food"))

restaurants %>% 
  select(business_id, neighborhood) %>% 
  inner_join(popular) %>% 
  distinct() %>% 
  group_by(neighborhood, category) %>% 
  tally() %>% 
  plotly::plot_ly(x = ~neighborhood, y = ~n, type = 'bar', color = ~category, hoverinfo = 'text',
        text = ~paste(neighborhood, " has ",
                      n, " ", category, " restaurants.")) %>%
  layout(yaxis = list(title = "Restaurants"), xaxis = list(title = "", tickangle = -45), barmode = 'stack')

Geographic Plot by Categories

restaurants %>% 
  inner_join(popular) %>% 
  plot_ly(x = ~longitude, y = ~latitude, type = "scatter", mode = "markers",
          alpha = 0.9, 
          color = ~category, hoverinfo = 'text',
        text = ~paste(name, " @", neighborhood, "\n", address, "\n", city, ", ", state, postal_code, "\n", stars, "star", category, "on Yelp.")) %>%
  layout(xaxis = list(title = "Longitude"),
         yaxis = list(title = "Latitude"))